# load necessary packages and data
library(tidyr)
library(dplyr)
library(readr)
library(plotly)
library(ggplot2)
library(jgcricolors)
library(ggsci)
library(Polychrome)

setwd("C:/Users/spei632/Documents/GCAM_industry/food_processing")
plot_dir <- "C:/Users/spei632/Documents/GCAM_industry/food_processing/initial_exploration/figures"

# load data
IEA_en_bal <- read_csv("initial_exploration/data/L100.IEA_en_bal_ctry_hist.csv")
comtrade_food_pro <- bind_rows(read_csv("initial_exploration/data/comtrade/comtrade_2015_1601-1905.csv"),
                               read_csv("initial_exploration/data/comtrade/comtrade_2015_2001-2205.csv"),
                               read_csv("initial_exploration/data/comtrade/comtrade_2015_2206-2404.csv"))
CostShare_FoodProcessing_GTAP <- read_csv("initial_exploration/data/CostShare_FoodProcessing_GTAP.csv")
L203.StubCalorieContent <- read_csv("initial_exploration/data/L203.StubCalorieContent_xz_cropremap_branch.csv")
L203.StubTechProd_food <- read_csv("initial_exploration/data/L203.StubTechProd_food_xz_cropremap_branch.csv")
L102.pcgdp_thous90USD_Scen_R_Y <- read_csv("initial_exploration/data/L102.pcgdp_thous90USD_Scen_R_Y.csv", skip = 2)


# set constants
CONV_KTOE_EJ <- 0.00004186
hist_years <- c(1990:2015)
gcam_hist_years <- c(1990, 2005, 2010, 2015)
conv_P_k <- 10^12

# mappings
IEA_product_fuel <- readr::read_csv("mappings/IEA_product_fuel.csv", skip = 7)
enduse_fuel_aggregation <- readr::read_csv("mappings/enduse_fuel_aggregation.csv", skip = 5)
IEA_ctry <- readr::read_csv("mappings/iea_ctry.csv", skip = 5)
iso_GCAM_regID <- readr::read_csv("mappings/iso_GCAM_regID.csv", skip = 6)
GCAM_region_names <- readr::read_csv("mappings/GCAM_region_names.csv", skip = 6)

# colors and palettes
data(palette36)
palette36 <- unname(palette36)
pal_sel <- jgcricol()$pal_all

Looking at IEA food processing data

First looking at IEA data, both by countries and by GCAM regions.

food_pro_sect <- "FOODPRO"

IEA_food_pro_region_fuel <- IEA_en_bal %>%
  filter(FLOW == food_pro_sect) %>%
  dplyr::select(iso, FLOW, PRODUCT, as.character(hist_years)) %>%
  left_join(select(iso_GCAM_regID, iso, GCAM_region_ID, country_name), by = "iso") %>%
  left_join(select(IEA_product_fuel, fuel, PRODUCT = product), by = "PRODUCT") %>%
  left_join(enduse_fuel_aggregation %>% dplyr::select(fuel, industry)) %>%
  # remap biomass_tradbio to biomass
  mutate(industry = if_else(fuel == "biomass_tradbio", "biomass", industry)) %>%
  # need to adjust a few that have no mapping to industry label for fuels - maybe don't do this?
  # mutate(industry = ifelse(is.na(industry), fuel, industry),
  #        industry = ifelse(industry %in% c("elec_solar CSP", "elec_geothermal"), "electricity", industry)) %>%
  pivot_longer(as.character(hist_years), names_to = "year", values_to = "value") %>%
  rename(fuel_orig = fuel, fuel = industry) %>%
  filter(!is.na(fuel)) %>%
  group_by(iso, country_name, GCAM_region_ID, year, fuel) %>%
  # summarize and convert to EJ
  summarize(value = sum(value, na.rm = TRUE) * CONV_KTOE_EJ) %>%
  ungroup()

IEA_food_pro_GCAM_region_fuel <- IEA_food_pro_region_fuel %>%
  # summarize by GCAM region
  group_by(GCAM_region_ID, year, fuel) %>%
  summarize(value = sum(value)) %>%
  ungroup() %>%
  full_join(GCAM_region_names) %>%
  mutate(year = replace_na(year, 2015))

# plot all regions
ggplot(IEA_food_pro_region_fuel, aes(x = year, y = value, fill = fuel)) +
  geom_col() + 
  facet_wrap(~country_name, scale = "free") + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Food processing energy use (IEA)") + 
  theme(axis.text.x = element_text(size=5)) + 
  scale_x_discrete(breaks = c(1990, 2000, 2010)) + 
  theme_classic()

ggsave(paste0(plot_dir, "/food_pro_energy_use_by_region_fuel_hist_IEA.png"), width = 15, height = 10)

# just plot 2015
ggplot(IEA_food_pro_region_fuel %>% filter(year == 2015), aes(x = country_name, y = value, fill = fuel)) +
  geom_col() + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Food processing energy use in 2015 (IEA)") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=8, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_pro_energy_use_by_region_fuel_2015_IEA.png"), width = 10, height = 6)

# plot by GCAM region
ggplot(IEA_food_pro_GCAM_region_fuel, aes(x = year, y = value, fill = fuel)) +
  geom_col() + 
  facet_wrap(~region, scale = "free") + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Food processing energy use (IEA)") + 
  theme(axis.text.x = element_text(size=5)) + 
  scale_x_discrete(breaks = c(1990, 2000, 2010)) + 
  theme_classic()

ggsave(paste0(plot_dir, "/food_pro_energy_use_by_GCAM_region_fuel_hist_IEA.png"), width = 15, height = 10)

# just plot 2015
ggplot(IEA_food_pro_GCAM_region_fuel %>% filter(year == 2015), aes(x = region, y = value, fill = fuel)) +
  geom_col() + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Food processing energy use in 2015 (IEA)") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_pro_energy_use_by_GCAM_region_fuel_2015_IEA.png"), width = 10, height = 6)

# global
ggplot(IEA_food_pro_GCAM_region_fuel, aes(x = year, y = value, fill = fuel)) +
  geom_col() + 
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) + 
  labs(x = "", y = "EJ", title = "Global food processing energy use (IEA)") + 
  theme(axis.text.x = element_text(size=5)) + 
  scale_x_discrete(breaks = c(1990, 2000, 2010)) + 
  theme_classic()

ggsave(paste0(plot_dir, "/food_pro_energy_use_by_fuel_global_hist_IEA.png"), width = 7, height = 5)

# 2015 pie charts - breakdown by region and fuel
IEA_food_pro_GCAM_region_fuel <- IEA_food_pro_GCAM_region_fuel %>%
  group_by(year, region) %>%
  mutate(share = value / sum(value, na.rm = TRUE)) %>%
  ungroup()

ggplot(IEA_food_pro_GCAM_region_fuel %>% filter(year == 2015),
       aes(x="", y = share, fill = fuel)) +
  facet_wrap(~region) + 
  geom_bar(width = 1, stat = "identity") + 
  geom_text(aes(label = paste0(round(share * 100, 0), "%")),
            position = position_stack(vjust = 0.5),
            size = 2) + 
  coord_polar("y", start = 0) +
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
  labs(x = "", y = "", title = "Food processing energy use in 2015 (IEA)") + 
  theme_bw() + 
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.grid  = element_blank())

ggsave(paste0(plot_dir, "/food_pro_energy_use_by_GCAM_region_fuel_2015_pie_IEA.png"), width = 12, height = 12)

IEA_food_pro_global_fuel_share <- IEA_food_pro_GCAM_region_fuel %>%
  group_by(year, fuel) %>%
  summarize(value = sum(value, na.rm = TRUE)) %>%
  filter(!is.na(fuel)) %>%
  group_by(year) %>%
  mutate(share = value / sum(value)) %>%
  ungroup()

ggplot(IEA_food_pro_global_fuel_share %>% filter(year == 2015),
       aes(x="", y = share, fill = fuel)) +
  geom_bar(width = 1, stat = "identity") + 
  geom_text(aes(label = paste0(round(share * 100, 0), "%")),
            position = position_stack(vjust = 0.5),
            size = 2) + 
  coord_polar("y", start = 0) +
  scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
  labs(x = "", y = "", title = "Global food processing energy use in 2015 (IEA)") + 
  theme_bw() + 
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.grid  = element_blank())

ggsave(paste0(plot_dir, "/food_pro_energy_use_by_fuel_global_2015_pie_IEA.png"), width = 5, height = 5)

IEA_food_pro_global_GCAM_region_share <- IEA_food_pro_GCAM_region_fuel %>%
  group_by(year, region) %>%
  summarize(value = sum(value, na.rm = TRUE)) %>%
  group_by(year) %>%
  mutate(share = value / sum(value)) %>%
  ungroup() %>%
  mutate(region_grouped = ifelse(share < 0.01, "Other", region))

ggplot(IEA_food_pro_global_GCAM_region_share %>% filter(year == 2015),
       aes(x="", y = share, fill = region_grouped)) +
  geom_bar(width = 1, stat = "identity") + 
  # geom_text(aes(label = paste0(round(share * 100, 0), "%")),
  #           position = position_stack(vjust = 0.5),
  #           size = 2) + 
  coord_polar("y", start = 0) +
  scale_fill_d3(palette = "category20", name = "Region") +
  labs(x = "", y = "", title = "Food processing energy use in 2015 (IEA)") + 
  theme_bw() + 
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.grid  = element_blank())

ggsave(paste0(plot_dir, "/food_pro_energy_use_by_GCAM_region_global_2015_pie_IEA.png"), width = 5, height = 5)

Identifying the GCAM regions with no energy used in food processing in the IEA data in 2015.

GCAM_regions_with_zero_food_pro_energy_IEA <- unique((IEA_food_pro_global_GCAM_region_share %>% filter(year == 2015 & value == 0))$region)

print("GCAM regions with no energy used in food processing in IEA data in 2015:")
## [1] "GCAM regions with no energy used in food processing in IEA data in 2015:"
print(GCAM_regions_with_zero_food_pro_energy_IEA)
## [1] "Pakistan"   "South Asia"

Looking at Comtrade data

comtrade_food_pro_edit <- comtrade_food_pro %>%
  dplyr::select(year = Year, Reporter, `Reporter ISO`, Commodity, `Commodity Code`, Qty, `Qty Unit`, `Trade Value (US$)`) %>%
  mutate(iso = ifelse(`Reporter ISO` == "ROU", "rom", tolower(`Reporter ISO`)))

# get total by region and unit
comtrade_food_pro_total_by_region_units <- comtrade_food_pro_edit %>%
  group_by(year, iso, Reporter, `Qty Unit`) %>%
  summarize(qty = sum(Qty, na.rm = TRUE),
            trade_val_usd = sum(`Trade Value (US$)`, na.rm = TRUE))

# aggregate by GCAM region
comtrade_food_pro_total_by_GCAM_region_units <- comtrade_food_pro_total_by_region_units %>%
  left_join(iso_GCAM_regID) %>%
  group_by(year, GCAM_region_ID, `Qty Unit`) %>%
  summarize(qty = sum(qty),
            trade_val_usd = sum(trade_val_usd)) %>%
  full_join(GCAM_region_names) 

# get just dollar value, aggregating all
comtrade_food_pro_total_by_GCAM_region_units_trade_val_only <- comtrade_food_pro_total_by_GCAM_region_units %>%
  group_by(year, region) %>%
  summarize(trade_val_usd = sum(trade_val_usd))

# see if the regions with no food processing energy use also have no exports
comtrade_total_GCAM_regions_w_zero_IEA <- comtrade_food_pro_total_by_GCAM_region_units %>%
  filter(region %in% GCAM_regions_with_zero_food_pro_energy_IEA)

# get the regions both not in comtrade and with zero in IEA
GCAM_regions_with_zero_food_pro_energy_IEA_and_no_comtrade <- setdiff(GCAM_regions_with_zero_food_pro_energy_IEA, unique(comtrade_total_GCAM_regions_w_zero_IEA$region))

print("GCAM regions with no energy used in food processing in IEA data in 2015 and also no data from Comtrade for 2015: ")
## [1] "GCAM regions with no energy used in food processing in IEA data in 2015 and also no data from Comtrade for 2015: "
print(GCAM_regions_with_zero_food_pro_energy_IEA_and_no_comtrade)
## character(0)
# plot comtrade data
ggplot(comtrade_food_pro_total_by_GCAM_region_units,
       aes(x = region, y = qty, fill = `Qty Unit`)) +
  geom_col() +
  labs(x = "", y = "mixed units (L and kg)", title = "Comtrade quantity of exports of food, 2015") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_pro_exports_qty_GCAM_region_2015_comtrade.png"), width = 10, height = 6)

# plot just dollar value
ggplot(comtrade_food_pro_total_by_GCAM_region_units,
       aes(x = region, y = trade_val_usd, fill = `Qty Unit`)) +
  geom_col() +
  labs(x = "", y = "USD", title = "Comtrade value of exports of food, 2015") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_pro_exports_usd_val_GCAM_region_2015_comtrade.png"), width = 10, height = 6)

So all the regions with no energy use do have exports in Comtrade.

GCAM processed FAO data

Note this is from Xin’s updated branch.

food_prod_GCAM_FAO <- L203.StubTechProd_food %>%
  dplyr::select(region, supplysector, subsector, stub.technology, subsector0, year, calOutputValue) %>%
  left_join(L203.StubCalorieContent %>%
              dplyr::select(-market.name)) %>%
  mutate(inputValue = calOutputValue / efficiency)

# plot the values for efficiency and consumption by region
plot_ly(food_prod_GCAM_FAO %>% filter(year == 2015)) %>%
  add_trace(x = ~stub.technology,
            y = ~calOutputValue,
            type = "scatter",
            mode = "markers",
            color = ~region) %>%
  layout(xaxis= list(title = ""),
         yaxis = list(title = "Pcal"),
         title = "Production of final calories, GCAM-FAO data, 2015")
plot_ly(food_prod_GCAM_FAO %>% filter(year == 2015)) %>%
  add_trace(x = ~stub.technology,
            y = ~efficiency,
            type = "scatter",
            mode = "markers",
            color = ~region) %>%
  layout(xaxis= list(title = ""),
         yaxis = list(title = "kcal/g (or Pcal/Mt) "),
         title = "Efficiency of calories produced per input, GCAM-FAO data, 2015")
plot_ly(food_prod_GCAM_FAO %>% filter(year == 2015)) %>%
  add_trace(x = ~region,
            y = ~efficiency,
            type = "scatter",
            mode = "markers",
            color = ~stub.technology) %>%
  layout(xaxis= list(title = ""),
         yaxis = list(title = "kcal/g (or Pcal/Mt) "),
         title = "Efficiency of calories produced per input, GCAM-FAO data, 2015")
# get global average for each crop
food_prod_GCAM_FAO_global_avg_by_crop <- food_prod_GCAM_FAO %>%
  group_by(supplysector, subsector, stub.technology, subsector0, year, minicam.energy.input) %>%
  summarize(calOutputValue = sum(calOutputValue),
            inputValue = sum(inputValue)) %>%
  ungroup() %>%
  mutate(efficiency = calOutputValue / inputValue)

# aggregate across nonstaples and staples and total
food_prod_GCAM_FAO_agg_sector <- food_prod_GCAM_FAO %>%
  group_by(region, supplysector, year) %>%
  summarize(calOutputValue = sum(calOutputValue),
            inputValue = sum(inputValue)) %>%
  ungroup() %>%
  mutate(efficiency = calOutputValue / inputValue) %>%
  group_by(region, year) %>%
  mutate(calOutputValue_frac_of_total = calOutputValue / sum(calOutputValue)) %>%
  ungroup()

food_prod_GCAM_FAO_agg_all <- food_prod_GCAM_FAO %>%
  group_by(region, year) %>%
  summarize(calOutputValue = sum(calOutputValue),
            inputValue = sum(inputValue)) %>%
  ungroup() %>%
  mutate(efficiency = calOutputValue / inputValue)

plot_ly(food_prod_GCAM_FAO_agg_sector %>% filter(year == 2015)) %>%
  add_trace(x = ~region,
            y = ~calOutputValue,
            type = "scatter",
            mode = "markers",
            color = ~supplysector) %>%
  layout(xaxis= list(title = ""),
         yaxis = list(title = "Pcal"),
         title = "Production of final calories by region and staples vs nonstaples, GCAM-FAO data, 2015")
plot_ly(food_prod_GCAM_FAO_agg_sector %>% filter(year == 2015)) %>%
  add_trace(x = ~region,
            y = ~efficiency,
            type = "scatter",
            mode = "markers",
            color = ~supplysector) %>%
  layout(xaxis= list(title = ""),
         yaxis = list(title = "kcal/g (or Pcal/Mt) "),
         title = "Efficiency of calories produced per input by region and staples vs nonstaples, GCAM-FAO data, 2015")
plot_ly(food_prod_GCAM_FAO_agg_sector %>% filter(year == 2015)) %>%
  add_trace(x = ~region,
            y = ~calOutputValue_frac_of_total,
            type = "scatter",
            mode = "markers",
            color = ~supplysector) %>%
  layout(xaxis= list(title = ""),
         yaxis = list(title = "kcal/g (or Pcal/Mt) "),
         title = "Fraction of calories produced from staples vs nonstaples, GCAM-FAO data, 2015")
plot_ly(food_prod_GCAM_FAO_agg_all %>% filter(year == 2015)) %>%
  add_trace(x = ~region,
            y = ~calOutputValue,
            type = "bar") %>%
  layout(xaxis= list(title = ""),
         yaxis = list(title = "Pcal"),
         title = "Production of final calories by region, GCAM-FAO data, 2015")
plot_ly(food_prod_GCAM_FAO_agg_all %>% filter(year == 2015)) %>%
  add_trace(x = ~region,
            y = ~efficiency,
            type = "bar") %>%
  layout(xaxis= list(title = "", categoryorder = "total ascending"),
         yaxis = list(title = "kcal/g (or Pcal/Mt) "),
         title = "Efficiency of calories produced per input by region, GCAM-FAO data, 2015")

Now compare the food production to the energy use data from the IEA.

# calculate the energy used per food calorie output
food_prod_GCAM_FAO_agg_all_IEA_total_fuel_comp <- food_prod_GCAM_FAO_agg_all %>%
  filter(year >= 1990) %>%
  rename(cal_efficiency = efficiency) %>%
  left_join(IEA_food_pro_global_GCAM_region_share %>% 
              dplyr::select(year, region, value) %>%
              rename(energy_use_EJ = value) %>%
              mutate(year = as.numeric(year))) %>%
  mutate(energy_per_Pcal = energy_use_EJ / calOutputValue,
         energy_per_kcal = energy_per_Pcal * conv_P_k,
         cal_coefficient = 1 / cal_efficiency,
         energy_per_input_crop = energy_use_EJ / inputValue) %>%
  ungroup()

# plot
plot_ly(food_prod_GCAM_FAO_agg_all_IEA_total_fuel_comp %>% filter(year == 2015)) %>%
  add_trace(x = ~region,
            y = ~energy_per_Pcal,
            type = "bar") %>%
  layout(xaxis = list(title = "", categoryorder = "total ascending"),
         yaxis = list(title = "EJ/Pcal"),
         title = "Food processing energy use (from IEA) per output kcal (FAO-GCAM), 2015")
plot_ly(food_prod_GCAM_FAO_agg_all_IEA_total_fuel_comp %>% filter(year == 2015)) %>%
  add_trace(x = ~region,
            y = ~energy_per_input_crop,
            type = "bar") %>%
  layout(xaxis = list(title = "", categoryorder = "total ascending"),
         yaxis = list(title = "EJ/Mt"),
         title = "Food processing energy use (from IEA) per input Mt (FAO-GCAM), 2015")
# combine the two metrics in one plot
plot_ly(food_prod_GCAM_FAO_agg_all_IEA_total_fuel_comp %>% filter(year == 2015)) %>%
  add_trace(x = ~region,
            y = ~cal_coefficient,
            type = "bar",
            name = "Crop coefficient") %>%
  add_trace(x = ~region,
            y = ~energy_per_Pcal,
            type = "scatter",
            mode = "markers",
            yaxis = "y2",
            name = "Energy coefficient",
            color = I("darkred")) %>%
  layout(xaxis= list(title = "", categoryorder = "total ascending"),
         yaxis = list(title = "Mt/Pcal"),
         yaxis2 = list(overlaying = "y", side = "right", title = "EJ/Pcal", 
                       tickfont = list(color = "darkred"), gridcolor = "darkred", 
                       range = c(0, 0.0065)),
         title = "Energy and crop coefficients for calories produced by region, IEA and GCAM-FAO data, 2015")

Save some figures with ggplot.

ggplot(food_prod_GCAM_FAO_agg_all_IEA_total_fuel_comp %>% filter(year == 2015), aes(x = reorder(region, cal_coefficient, FUN = identity), y = cal_coefficient)) +
  geom_col(fill = "steelblue") + 
  labs(x = "", y = "Mt/Pcal", title = "Coefficient of input crops per output calories by region, GCAM-FAO data, 2015") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_prod_coef_total_by_region_GCAM-FAO_2015.png"), width = 10, height = 6)

ggplot(food_prod_GCAM_FAO_agg_all_IEA_total_fuel_comp %>% filter(year == 2015), aes(x = reorder(region, energy_per_Pcal, FUN = identity), y = energy_per_Pcal)) +
  geom_col(fill = "steelblue") + 
  labs(x = "", y = "EJ/Pcal", title = "Coefficient of food processing energy use per output calories by region, IEA data + GCAM-FAO data, 2015") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_pro_coef_per_output_food_total_by_region_IEA_GCAM-FAO_2015.png"), width = 10, height = 6)

ggplot(food_prod_GCAM_FAO_agg_sector %>% filter(year == 2015), aes(x = region, y = calOutputValue, fill = supplysector)) +
  geom_bar(position = "dodge", stat = "identity") + 
  labs(x = "", y = "Pcal", title = "Output calories by region and staple vs nonstaples, GCAM-FAO data, 2015") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_prod_output_food_staples_nonstaples_by_region_GCAM-FAO_2015.png"), width = 10, height = 6)

ggplot(food_prod_GCAM_FAO_agg_sector %>% filter(year == 2015 & supplysector == "FoodDemand_NonStaples"), aes(x = reorder(region, calOutputValue_frac_of_total, FUN = "identity"), y = calOutputValue_frac_of_total)) +
  geom_bar(stat = "identity", fill = "steelblue") + 
  labs(x = "", y = "Fraction", title = "Fraction of output calories from non-staples by region, GCAM-FAO data, 2015") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_prod_output_food_nonstaples_frac_by_region_GCAM-FAO_2015.png"), width = 10, height = 6)

ggplot(food_prod_GCAM_FAO_agg_sector %>% filter(year == 2015), aes(x = region, y = efficiency, fill = supplysector)) +
  geom_bar(position = "dodge", stat = "identity") + 
  labs(x = "", y = "Pcal/Mt", title = "Food production efficiency (output / input crops) by region and staple vs nonstaples, GCAM-FAO data, 2015") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_prod_food_efficiency_staples_nonstaples_by_region_GCAM-FAO_2015.png"), width = 10, height = 6)

Compare to GDP per capita.

# select just historical GCAM years GDP, using SSP2 scenario (but values should be same across scenarios since they are historical)
GDP_per_cap_hist_GCAM_years <- L102.pcgdp_thous90USD_Scen_R_Y %>%
  filter(scenario == "SSP2" & year %in% gcam_hist_years) %>%
  left_join(GCAM_region_names) %>%
  rename(GDPpc_thous1990USD = value)

# join to food coefficient data
food_prod_GCAM_FAO <- food_prod_GCAM_FAO %>%
  left_join(GDP_per_cap_hist_GCAM_years %>%
              dplyr::select(-GCAM_region_ID, -scenario)) %>%
  ungroup()

food_prod_GCAM_FAO_agg_sector <- food_prod_GCAM_FAO_agg_sector %>%
  left_join(GDP_per_cap_hist_GCAM_years %>%
              dplyr::select(-GCAM_region_ID, -scenario)) %>%
  ungroup()

food_prod_GCAM_FAO_agg_all <- food_prod_GCAM_FAO_agg_all %>%
  left_join(GDP_per_cap_hist_GCAM_years %>%
              dplyr::select(-GCAM_region_ID, -scenario)) %>%
  ungroup()
  
# calculate linear models for relationship between efficiency of crops in per food out vs per capita GDP
plot_GDPpc_efficiency_lin_model <- function(crop, input_data) {
  sel_data <- input_data %>%
    filter(stub.technology == crop & year != 1975)
    # filter(stub.technology == crop & year == 2015)
  
  
  lin_reg <- lm(efficiency ~ GDPpc_thous1990USD, sel_data)
  lin_reg_summary <- summary(lin_reg)
  
  print(crop)
  print(lin_reg_summary)
  
  # get key values
  pval <- lin_reg_summary$coef[2,4]
  Rsq <- lin_reg_summary$adj.r.squared
  slope <- lin_reg_summary$coef[[2]]
  
  # plot
  x_for_lin_reg_plot <- c(min(sel_data$GDPpc_thous1990USD), max(sel_data$GDPpc_thous1990USD))
  plot_ly(sel_data) %>%
    add_trace(x = ~GDPpc_thous1990USD,
              y = ~efficiency,
              color = ~region,
              colors = palette36,
              type = "scatter",
              mode = "markers") %>%
    add_trace(#x = x_for_lin_reg_plot,
              #y = predict(lin_reg, data.frame(GDPpc_thous1990USD = x_for_lin_reg_plot)),
      x = seq(x_for_lin_reg_plot[1], x_for_lin_reg_plot[2]),
              y = predict(lin_reg, data.frame(GDPpc_thous1990USD = seq(x_for_lin_reg_plot[1], x_for_lin_reg_plot[2]))), 
              type = "scatter",
              mode = "lines",
              line = list(dash = ifelse(pval <= 0.05, "solid", "dash")),
              name = "Linear regression",
              text = paste0("R squared: ", round(Rsq, 3), ", P value: ", round(pval, 3))) %>%
    layout(xaxis = list(title = "GDP (thousand 1990$ MER) per capita"),
           yaxis = list(title = "Pcal/Mt"),
           title = paste0(crop, " production efficiency (output / input crops) by GDP per capita\nGCAM-FAO data (1990, 2005, 2010, 2015)"))
  
  # ggplot(sel_data, aes(x = GDPpc_thous1990USD, y = efficiency, color = region)) +
  #   geom_point() +
  #   scale_color_manual(values = palette36, name = "region") + 
  #   labs(x = "GDP per capita (thousand 1990$)", y = "Pcal/Mt", title = paste0(crop, " production efficiency (output / input crops) by GDP per capita, GCAM-FAO data, 2015")) + 
  #   theme_classic()
  # 
  # ggsave(paste0(plot_dir, "/food_prod_food_efficiency_", crop, "_by_GDPpc_GCAM-FAO_2015.png"), width = 10, height = 6)
}

crop_names <- unique(food_prod_GCAM_FAO$stub.technology)

plot_GDPpc_efficiency_lin_model(crop_names[1], food_prod_GCAM_FAO)
## [1] "Corn"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.79762 -0.14249  0.04774  0.17450  0.49763 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        3.406335   0.029717 114.627  < 2e-16 ***
## GDPpc_thous1990USD 0.006349   0.001952   3.252  0.00147 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2657 on 126 degrees of freedom
## Multiple R-squared:  0.07742,    Adjusted R-squared:  0.0701 
## F-statistic: 10.57 on 1 and 126 DF,  p-value: 0.001472
plot_GDPpc_efficiency_lin_model(crop_names[2], food_prod_GCAM_FAO)
## [1] "FiberCrop"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83252 -0.39868 -0.00483  0.17259  2.41314 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         6.0568191  0.0715166  84.691   <2e-16 ***
## GDPpc_thous1990USD -0.0002713  0.0046988  -0.058    0.954    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6395 on 126 degrees of freedom
## Multiple R-squared:  2.646e-05,  Adjusted R-squared:  -0.00791 
## F-statistic: 0.003334 on 1 and 126 DF,  p-value: 0.954
plot_GDPpc_efficiency_lin_model(crop_names[3],  food_prod_GCAM_FAO)
## [1] "Fruits"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.20358 -0.03653 -0.01288  0.03152  0.17212 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.4921196  0.0078517  62.677  < 2e-16 ***
## GDPpc_thous1990USD -0.0039654  0.0005159  -7.687 3.72e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07021 on 126 degrees of freedom
## Multiple R-squared:  0.3192, Adjusted R-squared:  0.3138 
## F-statistic: 59.09 on 1 and 126 DF,  p-value: 3.721e-12
plot_GDPpc_efficiency_lin_model(crop_names[4],  food_prod_GCAM_FAO)
## [1] "Legumes"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.123830 -0.028909  0.000325  0.020925  0.080212 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.4265498  0.0049434 693.159   <2e-16 ***
## GDPpc_thous1990USD -0.0006977  0.0003248  -2.148   0.0336 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0442 on 126 degrees of freedom
## Multiple R-squared:  0.03532,    Adjusted R-squared:  0.02767 
## F-statistic: 4.614 on 1 and 126 DF,  p-value: 0.03363
plot_GDPpc_efficiency_lin_model(crop_names[5],  food_prod_GCAM_FAO)
## [1] "MiscCrop"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.15311 -0.40538  0.01268  0.46509  1.84742 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1.852053   0.072253  25.633   <2e-16 ***
## GDPpc_thous1990USD -0.005417   0.004747  -1.141    0.256    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6461 on 126 degrees of freedom
## Multiple R-squared:  0.01023,    Adjusted R-squared:  0.002372 
## F-statistic: 1.302 on 1 and 126 DF,  p-value: 0.256
plot_GDPpc_efficiency_lin_model(crop_names[6],  food_prod_GCAM_FAO)
## [1] "NutsSeeds"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69551 -0.17831  0.04959  0.20741  0.79729 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.587637   0.040140  89.378   <2e-16 ***
## GDPpc_thous1990USD -0.004464   0.002637  -1.693    0.093 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3589 on 126 degrees of freedom
## Multiple R-squared:  0.02223,    Adjusted R-squared:  0.01447 
## F-statistic: 2.865 on 1 and 126 DF,  p-value: 0.093
plot_GDPpc_efficiency_lin_model(crop_names[7],  food_prod_GCAM_FAO)
## [1] "OilCrop"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0670 -1.3710 -0.1963  1.5972  2.7308 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         4.72263    0.18614  25.371  < 2e-16 ***
## GDPpc_thous1990USD  0.03431    0.01223   2.805  0.00583 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.664 on 126 degrees of freedom
## Multiple R-squared:  0.05878,    Adjusted R-squared:  0.05131 
## F-statistic: 7.869 on 1 and 126 DF,  p-value: 0.005828
plot_GDPpc_efficiency_lin_model(crop_names[8],  food_prod_GCAM_FAO)
## [1] "Soybean"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6427 -1.5947  0.6564  1.1707  2.5737 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         7.43097    0.16233  45.778  < 2e-16 ***
## GDPpc_thous1990USD -0.04018    0.01067  -3.767 0.000252 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.451 on 126 degrees of freedom
## Multiple R-squared:  0.1012, Adjusted R-squared:  0.0941 
## F-statistic: 14.19 on 1 and 126 DF,  p-value: 0.0002522
plot_GDPpc_efficiency_lin_model(crop_names[9],  food_prod_GCAM_FAO)
## [1] "OtherGrain"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.06497 -0.31634  0.00934  0.24561  1.07253 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.328365   0.054772  60.767   <2e-16 ***
## GDPpc_thous1990USD -0.024138   0.003599  -6.707    6e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4898 on 126 degrees of freedom
## Multiple R-squared:  0.2631, Adjusted R-squared:  0.2573 
## F-statistic: 44.99 on 1 and 126 DF,  p-value: 6.002e-10
plot_GDPpc_efficiency_lin_model(crop_names[10],  food_prod_GCAM_FAO)
## [1] "OilPalm"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31931 -0.02721 -0.00852  0.00426  0.41047 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.3885664  0.0130318 183.288   <2e-16 ***
## GDPpc_thous1990USD 0.0013141  0.0008562   1.535    0.127    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1165 on 126 degrees of freedom
## Multiple R-squared:  0.01835,    Adjusted R-squared:  0.01056 
## F-statistic: 2.355 on 1 and 126 DF,  p-value: 0.1274
plot_GDPpc_efficiency_lin_model(crop_names[11],  food_prod_GCAM_FAO)
## [1] "Rice"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.20931 -0.07904 -0.01466  0.09215  0.26397 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.6931887  0.0118672 226.945   <2e-16 ***
## GDPpc_thous1990USD -0.0001784  0.0007797  -0.229    0.819    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1061 on 126 degrees of freedom
## Multiple R-squared:  0.0004154,  Adjusted R-squared:  -0.007518 
## F-statistic: 0.05236 on 1 and 126 DF,  p-value: 0.8194
plot_GDPpc_efficiency_lin_model(crop_names[12],  food_prod_GCAM_FAO)
## [1] "RootTuber"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.11621 -0.06848 -0.02433  0.04424  0.24398 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.7813509  0.0103256  75.671  < 2e-16 ***
## GDPpc_thous1990USD -0.0033776  0.0006784  -4.979 2.06e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09233 on 126 degrees of freedom
## Multiple R-squared:  0.1644, Adjusted R-squared:  0.1577 
## F-statistic: 24.79 on 1 and 126 DF,  p-value: 2.059e-06
plot_GDPpc_efficiency_lin_model(crop_names[13],  food_prod_GCAM_FAO)
## [1] "SugarCrop"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.165184 -0.027320  0.001692  0.016498  0.156907 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.5517383  0.0060328  91.457  < 2e-16 ***
## GDPpc_thous1990USD 0.0019721  0.0003964   4.975 2.09e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05394 on 126 degrees of freedom
## Multiple R-squared:  0.1642, Adjusted R-squared:  0.1576 
## F-statistic: 24.75 on 1 and 126 DF,  p-value: 2.088e-06
plot_GDPpc_efficiency_lin_model(crop_names[14],  food_prod_GCAM_FAO)
## [1] "Vegetables"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.058323 -0.029551 -0.004327  0.014299  0.099078 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.2927823  0.0041940  69.810  < 2e-16 ***
## GDPpc_thous1990USD -0.0008553  0.0002756  -3.104  0.00236 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0375 on 126 degrees of freedom
## Multiple R-squared:  0.07102,    Adjusted R-squared:  0.06365 
## F-statistic: 9.633 on 1 and 126 DF,  p-value: 0.00236
plot_GDPpc_efficiency_lin_model(crop_names[15],  food_prod_GCAM_FAO)
## [1] "Wheat"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.27693 -0.06809  0.02868  0.08044  0.23395 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.473751   0.012907 269.135   <2e-16 ***
## GDPpc_thous1990USD -0.001451   0.000848  -1.711   0.0895 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1154 on 126 degrees of freedom
## Multiple R-squared:  0.02271,    Adjusted R-squared:  0.01495 
## F-statistic: 2.928 on 1 and 126 DF,  p-value: 0.08952
plot_GDPpc_efficiency_lin_model(crop_names[16],  food_prod_GCAM_FAO)
## [1] "Beef"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7832 -0.2494  0.0104  0.2169  0.6740 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1.613779   0.039154  41.217   <2e-16 ***
## GDPpc_thous1990USD -0.005514   0.002573  -2.143    0.034 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3501 on 126 degrees of freedom
## Multiple R-squared:  0.03518,    Adjusted R-squared:  0.02752 
## F-statistic: 4.594 on 1 and 126 DF,  p-value: 0.034
plot_GDPpc_efficiency_lin_model(crop_names[17],  food_prod_GCAM_FAO)
## [1] "Dairy"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.27540 -0.08160 -0.04350  0.05059  0.45075 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.772306   0.017024  45.367  < 2e-16 ***
## GDPpc_thous1990USD 0.009040   0.001119   8.082 4.48e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1522 on 126 degrees of freedom
## Multiple R-squared:  0.3414, Adjusted R-squared:  0.3362 
## F-statistic: 65.32 on 1 and 126 DF,  p-value: 4.476e-13
plot_GDPpc_efficiency_lin_model(crop_names[18],  food_prod_GCAM_FAO)
## [1] "Pork"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2070 -0.2873 -0.1724  0.3267  1.3237 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.238896   0.059120  54.785  < 2e-16 ***
## GDPpc_thous1990USD -0.013492   0.003884  -3.473 0.000705 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5286 on 126 degrees of freedom
## Multiple R-squared:  0.08738,    Adjusted R-squared:  0.08014 
## F-statistic: 12.06 on 1 and 126 DF,  p-value: 0.0007051
plot_GDPpc_efficiency_lin_model(crop_names[19],  food_prod_GCAM_FAO)
## [1] "Poultry"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.15138 -0.04045 -0.01579  0.03940  0.26634 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.3028654  0.0081577 159.711   <2e-16 ***
## GDPpc_thous1990USD 0.0006975  0.0005360   1.301    0.195    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07294 on 126 degrees of freedom
## Multiple R-squared:  0.01326,    Adjusted R-squared:  0.005432 
## F-statistic: 1.694 on 1 and 126 DF,  p-value: 0.1955
plot_GDPpc_efficiency_lin_model(crop_names[20],  food_prod_GCAM_FAO)
## [1] "SheepGoat"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69838 -0.13628  0.02475  0.17652  0.46054 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.643843   0.026377   62.32   <2e-16 ***
## GDPpc_thous1990USD 0.021312   0.001733   12.30   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2359 on 126 degrees of freedom
## Multiple R-squared:  0.5455, Adjusted R-squared:  0.5419 
## F-statistic: 151.2 on 1 and 126 DF,  p-value: < 2.2e-16
plot_GDPpc_efficiency_lin_model(crop_names[21],  food_prod_GCAM_FAO)
## [1] "OtherMeat_Fish"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.49058 -0.23793 -0.08999  0.17509  1.27248 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1.092100   0.038066  28.689   <2e-16 ***
## GDPpc_thous1990USD -0.006429   0.002501  -2.571   0.0113 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3404 on 126 degrees of freedom
## Multiple R-squared:  0.04983,    Adjusted R-squared:  0.04229 
## F-statistic: 6.608 on 1 and 126 DF,  p-value: 0.01132
# repeat for aggregated crops
plot_GDPpc_efficiency_lin_model("FoodDemand_Staples",  food_prod_GCAM_FAO_agg_sector %>% rename(stub.technology = supplysector))
## [1] "FoodDemand_Staples"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.79768 -0.27001 -0.02309  0.26307  0.73696 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.550179   0.039308  64.876   <2e-16 ***
## GDPpc_thous1990USD -0.003446   0.002583  -1.334    0.185    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3515 on 126 degrees of freedom
## Multiple R-squared:  0.01393,    Adjusted R-squared:  0.006104 
## F-statistic:  1.78 on 1 and 126 DF,  p-value: 0.1846
plot_GDPpc_efficiency_lin_model("FoodDemand_NonStaples",  food_prod_GCAM_FAO_agg_sector %>% rename(stub.technology = supplysector))
## [1] "FoodDemand_NonStaples"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.25975 -0.08385 -0.00214  0.08047  0.35654 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.9539585  0.0135281  70.517  < 2e-16 ***
## GDPpc_thous1990USD 0.0027174  0.0008888   3.057  0.00273 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.121 on 126 degrees of freedom
## Multiple R-squared:  0.06906,    Adjusted R-squared:  0.06167 
## F-statistic: 9.347 on 1 and 126 DF,  p-value: 0.002728
plot_GDPpc_efficiency_lin_model("all",  food_prod_GCAM_FAO_agg_all %>% mutate(stub.technology = "all"))
## [1] "all"
## 
## Call:
## lm(formula = efficiency ~ GDPpc_thous1990USD, data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48080 -0.10569 -0.02116  0.10058  0.64944 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1.543039   0.022322  69.127  < 2e-16 ***
## GDPpc_thous1990USD -0.007132   0.001467  -4.863 3.38e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1996 on 126 degrees of freedom
## Multiple R-squared:  0.158,  Adjusted R-squared:  0.1514 
## F-statistic: 23.65 on 1 and 126 DF,  p-value: 3.377e-06
# try with different function
plot_GDPpc_efficiency_exp_model <- function(crop, input_data) {
  sel_data <- input_data %>%
    filter(stub.technology == crop & year != 1975)
    # filter(stub.technology == crop & year == 2015)
  
  lin_reg <- lm(efficiency ~ log(GDPpc_thous1990USD), sel_data)
  lin_reg_summary <- summary(lin_reg)
  
  print(crop)
  print(lin_reg_summary)
  
  # get key values
  pval <- lin_reg_summary$coef[2,4]
  Rsq <- lin_reg_summary$adj.r.squared
  slope <- lin_reg_summary$coef[[2]]
  
  # plot
  x_for_lin_reg_plot <- c(min(sel_data$GDPpc_thous1990USD), max(sel_data$GDPpc_thous1990USD))
  plot_ly(sel_data) %>%
    add_trace(x = ~GDPpc_thous1990USD,
              y = ~efficiency,
              color = ~region,
              colors = palette36,
              type = "scatter",
              mode = "markers") %>%
    add_trace(x = seq(x_for_lin_reg_plot[1], x_for_lin_reg_plot[2]),
              y = predict(lin_reg, data.frame(GDPpc_thous1990USD = seq(x_for_lin_reg_plot[1], x_for_lin_reg_plot[2]))),
              type = "scatter",
              mode = "lines",
              line = list(dash = ifelse(pval <= 0.05, "solid", "dash")),
              name = "Regression",
              text = paste0("R squared: ", round(Rsq, 3), ", P value: ", round(pval, 3))) %>%
    layout(xaxis = list(title = "GDP (thousand 1990$ MER) per capita"),
           yaxis = list(title = "Pcal/Mt"),
           title = paste0(crop, " production efficiency (output / input crops) by GDP per capita\nGCAM-FAO data (1990, 2005, 2010, 2015)"))
  
  # ggplot(sel_data, aes(x = GDPpc_thous1990USD, y = efficiency, color = region)) +
  #   geom_point() +
  #   scale_color_manual(values = palette36, name = "region") + 
  #   labs(x = "GDP per capita (thousand 1990$)", y = "Pcal/Mt", title = paste0(crop, " production efficiency (output / input crops) by GDP per capita, GCAM-FAO data, 2015")) + 
  #   theme_classic()
  # 
  # ggsave(paste0(plot_dir, "/food_prod_food_efficiency_", crop, "_by_GDPpc_GCAM-FAO_2015.png"), width = 10, height = 6)
}

plot_GDPpc_efficiency_exp_model(crop_names[1], food_prod_GCAM_FAO)
## [1] "Corn"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8304 -0.1416  0.0318  0.1923  0.4771 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              3.37224    0.03443  97.942  < 2e-16 ***
## log(GDPpc_thous1990USD)  0.06463    0.01760   3.672 0.000354 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2629 on 126 degrees of freedom
## Multiple R-squared:  0.09669,    Adjusted R-squared:  0.08952 
## F-statistic: 13.49 on 1 and 126 DF,  p-value: 0.0003537
plot_GDPpc_efficiency_exp_model(crop_names[2], food_prod_GCAM_FAO)
## [1] "FiberCrop"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.86583 -0.37751 -0.00444  0.18714  2.43397 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              6.11216    0.08345   73.24   <2e-16 ***
## log(GDPpc_thous1990USD) -0.04009    0.04265   -0.94    0.349    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6373 on 126 degrees of freedom
## Multiple R-squared:  0.006963,   Adjusted R-squared:  -0.0009182 
## F-statistic: 0.8835 on 1 and 126 DF,  p-value: 0.349
plot_GDPpc_efficiency_exp_model(crop_names[3],  food_prod_GCAM_FAO)
## [1] "Fruits"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.184880 -0.037427 -0.008132  0.038444  0.119447 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              0.515340   0.008446  61.015   <2e-16 ***
## log(GDPpc_thous1990USD) -0.041701   0.004317  -9.659   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0645 on 126 degrees of freedom
## Multiple R-squared:  0.4255, Adjusted R-squared:  0.4209 
## F-statistic:  93.3 on 1 and 126 DF,  p-value: < 2.2e-16
plot_GDPpc_efficiency_exp_model(crop_names[4],  food_prod_GCAM_FAO)
## [1] "Legumes"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.122534 -0.027139  0.001711  0.024056  0.085270 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              3.429964   0.005770  594.40   <2e-16 ***
## log(GDPpc_thous1990USD) -0.006871   0.002949   -2.33   0.0214 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04407 on 126 degrees of freedom
## Multiple R-squared:  0.0413, Adjusted R-squared:  0.03369 
## F-statistic: 5.428 on 1 and 126 DF,  p-value: 0.02141
plot_GDPpc_efficiency_exp_model(crop_names[5],  food_prod_GCAM_FAO)
## [1] "MiscCrop"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.11024 -0.49540  0.04485  0.42096  1.84721 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              1.93877    0.08341   23.24   <2e-16 ***
## log(GDPpc_thous1990USD) -0.09507    0.04263   -2.23   0.0275 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.637 on 126 degrees of freedom
## Multiple R-squared:  0.03796,    Adjusted R-squared:  0.03033 
## F-statistic: 4.972 on 1 and 126 DF,  p-value: 0.02753
plot_GDPpc_efficiency_exp_model(crop_names[6],  food_prod_GCAM_FAO)
## [1] "NutsSeeds"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84450 -0.18360  0.08668  0.21103  0.73087 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              3.65269    0.04575   79.83  < 2e-16 ***
## log(GDPpc_thous1990USD) -0.07390    0.02339   -3.16  0.00198 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3494 on 126 degrees of freedom
## Multiple R-squared:  0.07342,    Adjusted R-squared:  0.06607 
## F-statistic: 9.985 on 1 and 126 DF,  p-value: 0.001977
plot_GDPpc_efficiency_exp_model(crop_names[7],  food_prod_GCAM_FAO)
## [1] "OilCrop"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1242 -1.2478 -0.1328  1.5469  2.7709 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               4.5518     0.2167  21.004  < 2e-16 ***
## log(GDPpc_thous1990USD)   0.3399     0.1108   3.069  0.00263 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.655 on 126 degrees of freedom
## Multiple R-squared:  0.06955,    Adjusted R-squared:  0.06217 
## F-statistic: 9.419 on 1 and 126 DF,  p-value: 0.00263
plot_GDPpc_efficiency_exp_model(crop_names[8],  food_prod_GCAM_FAO)
## [1] "Soybean"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -2.734 -1.754  0.465  1.223  2.309 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               7.3635     0.1970  37.372   <2e-16 ***
## log(GDPpc_thous1990USD)  -0.2128     0.1007  -2.113   0.0366 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.505 on 126 degrees of freedom
## Multiple R-squared:  0.03423,    Adjusted R-squared:  0.02656 
## F-statistic: 4.465 on 1 and 126 DF,  p-value: 0.03656
plot_GDPpc_efficiency_exp_model(crop_names[9],  food_prod_GCAM_FAO)
## [1] "OtherGrain"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.16369 -0.30923 -0.09021  0.31701  1.13232 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              3.39410    0.06595  51.467  < 2e-16 ***
## log(GDPpc_thous1990USD) -0.20145    0.03371  -5.976 2.18e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5036 on 126 degrees of freedom
## Multiple R-squared:  0.2209, Adjusted R-squared:  0.2147 
## F-statistic: 35.72 on 1 and 126 DF,  p-value: 2.184e-08
plot_GDPpc_efficiency_exp_model(crop_names[10],  food_prod_GCAM_FAO)
## [1] "OilPalm"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32678 -0.03065 -0.01290  0.01429  0.40265 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             2.384428   0.015274 156.113   <2e-16 ***
## log(GDPpc_thous1990USD) 0.011355   0.007807   1.454    0.148    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1166 on 126 degrees of freedom
## Multiple R-squared:  0.01651,    Adjusted R-squared:  0.008707 
## F-statistic: 2.115 on 1 and 126 DF,  p-value: 0.1483
plot_GDPpc_efficiency_exp_model(crop_names[11],  food_prod_GCAM_FAO)
## [1] "Rice"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.21031 -0.07607 -0.01667  0.09417  0.26192 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             2.687474   0.013890 193.483   <2e-16 ***
## log(GDPpc_thous1990USD) 0.002807   0.007100   0.395    0.693    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1061 on 126 degrees of freedom
## Multiple R-squared:  0.001239,   Adjusted R-squared:  -0.006688 
## F-statistic: 0.1563 on 1 and 126 DF,  p-value: 0.6933
plot_GDPpc_efficiency_exp_model(crop_names[12],  food_prod_GCAM_FAO)
## [1] "RootTuber"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.148104 -0.058069 -0.008527  0.050929  0.230991 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              0.80748    0.01125  71.781  < 2e-16 ***
## log(GDPpc_thous1990USD) -0.03991    0.00575  -6.942 1.82e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0859 on 126 degrees of freedom
## Multiple R-squared:  0.2767, Adjusted R-squared:  0.2709 
## F-statistic: 48.19 on 1 and 126 DF,  p-value: 1.821e-10
plot_GDPpc_efficiency_exp_model(crop_names[13],  food_prod_GCAM_FAO)
## [1] "SugarCrop"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.166925 -0.027220  0.007122  0.024166  0.158195 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             0.541448   0.006907  78.386  < 2e-16 ***
## log(GDPpc_thous1990USD) 0.019867   0.003531   5.627 1.13e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05275 on 126 degrees of freedom
## Multiple R-squared:  0.2008, Adjusted R-squared:  0.1945 
## F-statistic: 31.66 on 1 and 126 DF,  p-value: 1.13e-07
plot_GDPpc_efficiency_exp_model(crop_names[14],  food_prod_GCAM_FAO)
## [1] "Vegetables"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.056326 -0.029180 -0.000534  0.016615  0.087538 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              0.300006   0.004753  63.116  < 2e-16 ***
## log(GDPpc_thous1990USD) -0.010528   0.002430  -4.333 2.97e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0363 on 126 degrees of freedom
## Multiple R-squared:  0.1297, Adjusted R-squared:  0.1228 
## F-statistic: 18.78 on 1 and 126 DF,  p-value: 2.971e-05
plot_GDPpc_efficiency_exp_model(crop_names[15],  food_prod_GCAM_FAO)
## [1] "Wheat"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28075 -0.07501  0.01831  0.07802  0.23119 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              3.495404   0.014686 238.011  < 2e-16 ***
## log(GDPpc_thous1990USD) -0.024373   0.007506  -3.247  0.00149 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1121 on 126 degrees of freedom
## Multiple R-squared:  0.07721,    Adjusted R-squared:  0.06989 
## F-statistic: 10.54 on 1 and 126 DF,  p-value: 0.001495
plot_GDPpc_efficiency_exp_model(crop_names[16],  food_prod_GCAM_FAO)
## [1] "Beef"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7690 -0.2669  0.0031  0.2163  0.7018 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              1.59622    0.04650  34.331   <2e-16 ***
## log(GDPpc_thous1990USD) -0.02345    0.02377  -0.987    0.326    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3551 on 126 degrees of freedom
## Multiple R-squared:  0.007669,   Adjusted R-squared:  -0.0002064 
## F-statistic: 0.9738 on 1 and 126 DF,  p-value: 0.3256
plot_GDPpc_efficiency_exp_model(crop_names[17],  food_prod_GCAM_FAO)
## [1] "Dairy"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.25027 -0.10485  0.00109  0.08649  0.39348 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              0.73388    0.01959   37.46  < 2e-16 ***
## log(GDPpc_thous1990USD)  0.08501    0.01001    8.49 4.88e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1496 on 126 degrees of freedom
## Multiple R-squared:  0.3639, Adjusted R-squared:  0.3588 
## F-statistic: 72.07 on 1 and 126 DF,  p-value: 4.876e-14
plot_GDPpc_efficiency_exp_model(crop_names[18],  food_prod_GCAM_FAO)
## [1] "Pork"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1191 -0.3477 -0.1498  0.3481  1.0759 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              3.30886    0.06850  48.304  < 2e-16 ***
## log(GDPpc_thous1990USD) -0.13562    0.03501  -3.873 0.000172 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5231 on 126 degrees of freedom
## Multiple R-squared:  0.1064, Adjusted R-squared:  0.09931 
## F-statistic:    15 on 1 and 126 DF,  p-value: 0.0001716
plot_GDPpc_efficiency_exp_model(crop_names[19],  food_prod_GCAM_FAO)
## [1] "Poultry"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.14346 -0.04138 -0.01848  0.03572  0.27640 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             1.298175   0.009521 136.354   <2e-16 ***
## log(GDPpc_thous1990USD) 0.007755   0.004866   1.594    0.114    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0727 on 126 degrees of freedom
## Multiple R-squared:  0.01976,    Adjusted R-squared:  0.01198 
## F-statistic:  2.54 on 1 and 126 DF,  p-value: 0.1135
plot_GDPpc_efficiency_exp_model(crop_names[20],  food_prod_GCAM_FAO)
## [1] "SheepGoat"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.82169 -0.12688  0.01346  0.17539  0.59391 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              1.56941    0.03180   49.36   <2e-16 ***
## log(GDPpc_thous1990USD)  0.18922    0.01625   11.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2428 on 126 degrees of freedom
## Multiple R-squared:  0.5183, Adjusted R-squared:  0.5144 
## F-statistic: 135.5 on 1 and 126 DF,  p-value: < 2.2e-16
plot_GDPpc_efficiency_exp_model(crop_names[21],  food_prod_GCAM_FAO)
## [1] "OtherMeat_Fish"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.59622 -0.20207 -0.07554  0.19368  1.18837 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              1.14428    0.04368  26.199  < 2e-16 ***
## log(GDPpc_thous1990USD) -0.07768    0.02232  -3.479 0.000691 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3335 on 126 degrees of freedom
## Multiple R-squared:  0.08766,    Adjusted R-squared:  0.08042 
## F-statistic: 12.11 on 1 and 126 DF,  p-value: 0.0006906
# repeat for aggregated crops
plot_GDPpc_efficiency_exp_model("FoodDemand_Staples",  food_prod_GCAM_FAO_agg_sector %>% rename(stub.technology = supplysector))
## [1] "FoodDemand_Staples"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77637 -0.26820 -0.02835  0.26484  0.75621 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              2.525494   0.046343  54.496   <2e-16 ***
## log(GDPpc_thous1990USD) -0.005158   0.023687  -0.218    0.828    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3539 on 126 degrees of freedom
## Multiple R-squared:  0.0003762,  Adjusted R-squared:  -0.007557 
## F-statistic: 0.04742 on 1 and 126 DF,  p-value: 0.828
plot_GDPpc_efficiency_exp_model("FoodDemand_NonStaples",  food_prod_GCAM_FAO_agg_sector %>% rename(stub.technology = supplysector))
## [1] "FoodDemand_NonStaples"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.26968 -0.08902  0.00099  0.07949  0.35392 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             0.957871   0.016212  59.082   <2e-16 ***
## log(GDPpc_thous1990USD) 0.014843   0.008287   1.791   0.0757 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1238 on 126 degrees of freedom
## Multiple R-squared:  0.02483,    Adjusted R-squared:  0.01709 
## F-statistic: 3.208 on 1 and 126 DF,  p-value: 0.07567
plot_GDPpc_efficiency_exp_model("all",  food_prod_GCAM_FAO_agg_all %>% mutate(stub.technology = "all"))
## [1] "all"
## 
## Call:
## lm(formula = efficiency ~ log(GDPpc_thous1990USD), data = sel_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.43516 -0.09641 -0.02280  0.10390  0.64645 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              1.60668    0.02376  67.627  < 2e-16 ***
## log(GDPpc_thous1990USD) -0.09016    0.01214  -7.425 1.49e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1814 on 126 degrees of freedom
## Multiple R-squared:  0.3043, Adjusted R-squared:  0.2988 
## F-statistic: 55.12 on 1 and 126 DF,  p-value: 1.488e-11

GTAP data

# plot detailed data across regions

# calculate shares
CostShare_FoodProcessing_GTAP <- CostShare_FoodProcessing_GTAP %>%
  group_by(year, output, region_GCAM, region_GCAM_abb) %>%
  mutate(share = value / sum(value),
         value = value / 1000)

# plot detailed
ggplot(CostShare_FoodProcessing_GTAP %>% filter(year == 2014),
       aes(x = region_GCAM_abb, y = value, fill = input_AGG_Detail)) +
  geom_col() +
  scale_fill_manual(values = palette36, name = "Cost") +
  facet_wrap(~output, ncol = 2, scale = "free") +
  labs(x = "Region", y = "$ Billion", title = "Food processing sectors value cost (agent prices), 2014, GTAP") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_pro_costs_by_type_reg_detailed_GTAP_2014.png"), width = 12, height = 15)

# plot detailed shares
ggplot(CostShare_FoodProcessing_GTAP %>% filter(year == 2014),
       aes(x = region_GCAM_abb, y = share, fill = input_AGG_Detail)) +
  geom_col() +
  scale_fill_manual(values = palette36, name = "Cost") +
  facet_wrap(~output, ncol = 2) +
  labs(x = "Region", y = "Share", title = "Food processing sectors value cost shares (agent prices), 2014, GTAP") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_pro_cost_shares_by_type_reg_detailed_GTAP_2014.png"), width = 12, height = 15)

# aggregate to summarized cost type and calculate shares
CostShare_FoodProcessing_GTAP_agg <- CostShare_FoodProcessing_GTAP %>%
  group_by(year, output, region_GCAM, region_GCAM_abb, input_AGG) %>%
  summarize(value = sum(value)) %>%
  group_by(year, output, region_GCAM) %>%
  mutate(share = value / sum(value))

# plot the shares and values by region
ggplot(CostShare_FoodProcessing_GTAP_agg %>% filter(year == 2014),
       aes(x = region_GCAM_abb, y = value, fill = input_AGG)) +
  geom_col() +
  scale_fill_d3(palette = "category20", name = "Cost") +
  facet_wrap(~output, ncol = 2, scale = "free") +
  labs(x = "Region", y = "$ Billion", title = "Food processing sectors value cost (agent prices), 2014, GTAP") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_pro_costs_by_type_reg_GTAP_2014.png"), width = 12, height = 15)

ggplot(CostShare_FoodProcessing_GTAP_agg %>% filter(year == 2014),
       aes(x = region_GCAM_abb, y = share, fill = input_AGG)) +
  geom_col() +
  scale_fill_d3(palette = "category20", name = "Cost") +
  facet_wrap(~output, ncol = 2, scale = "free") +
  labs(x = "Region", y = "Share", title = "Food processing sectors value cost shares (agent prices), 2014, GTAP") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_pro_cost_shares_by_type_reg_GTAP_2014.png"), width = 12, height = 15)

# plot just energy, capital, labor portions
ggplot(CostShare_FoodProcessing_GTAP_agg %>% filter(year == 2014 & input_AGG %in% c("Energy", "Capital", "Labor")),
       aes(x = region_GCAM_abb, y = value, fill = input_AGG)) +
  geom_bar(position = "dodge", stat = "identity") +
  scale_fill_d3(palette = "category20", name = "Cost") +
  facet_wrap(~output, ncol = 2, scale = "free") +
  labs(x = "Region", y = "$ Billion", title = "Food processing sectors value cost (agent prices), 2014, GTAP") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_pro_costs_sel_reg_GTAP_2014.png"), width = 12, height = 15)

ggplot(CostShare_FoodProcessing_GTAP_agg %>% filter(year == 2014 & input_AGG %in% c("Energy", "Capital", "Labor")),
       aes(x = region_GCAM_abb, y = share, fill = input_AGG)) +
  geom_bar(position = "dodge", stat = "identity") +
  scale_fill_d3(palette = "category20", name = "Cost") +
  facet_wrap(~output, ncol = 2, scale = "free") +
  labs(x = "Region", y = "Share", title = "Food processing sectors value cost shares (agent prices), 2014, GTAP") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_pro_cost_shares_sel_reg_GTAP_2014.png"), width = 12, height = 15)

# plot just energy costs for overall food processing
ggplot(CostShare_FoodProcessing_GTAP_agg %>% filter(year == 2014 & input_AGG == "Energy" & output == "FoodProduct"),
       aes(x = reorder(region_GCAM, value, FUN = identity), y = value)) +
  geom_bar(position = "dodge", stat = "identity", fill = "steelblue") +
  labs(x = "Region", y = "$ Billion", title = "FoodProduct energy cost (agent prices), 2014, GTAP") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_pro_costs_energy_FoodProduct_reg_GTAP_2014.png"), width = 10, height = 6)

ggplot(CostShare_FoodProcessing_GTAP_agg %>% filter(year == 2014 & input_AGG == "Energy" & output == "FoodProduct"),
       aes(x = reorder(region_GCAM, share, FUN = identity), y = share)) +
  geom_bar(position = "dodge", stat = "identity", fill = "steelblue") +
  labs(x = "Region", y = "Share", title = "FoodProduct energy cost shares (agent prices), 2014, GTAP") + 
  theme_classic() + 
  theme(axis.text.x = element_text(size=12, angle=90, vjust=0.5, hjust=1))

ggsave(paste0(plot_dir, "/food_pro_cost_shares_energy_FoodProduct_reg_GTAP_2014.png"), width = 10, height = 6)